#!/usr/bin/env python3
"""
Process all comments_batch_*.csv files with all models.
This will take a while - processes each file with 4 models.
"""

import sys
from pathlib import Path
from multi_model_labeling import process_csv_with_all_models, MODELS
import time

def process_all_batches(pattern="comments_batch_*.csv", output_dir="multi_model_results"):
    """
    Find and process all CSV files matching the pattern.
    """
    # Find all matching files
    files = sorted(Path('.').glob(pattern))
    
    if not files:
        print(f"❌ No files found matching pattern: {pattern}")
        print(f"   Current directory: {Path('.').absolute()}")
        print(f"\n   Make sure your CSV files are in this directory")
        return
    
    print(f"\n{'#'*60}")
    print(f"BATCH PROCESSING - ALL FILES WITH ALL MODELS")
    print(f"{'#'*60}")
    print(f"Found {len(files)} files to process:")
    for f in files:
        print(f"  - {f.name}")
    print(f"\nModels to use ({len(MODELS)}):")
    for model in MODELS:
        print(f"  - {model}")
    print(f"\nOutput directory: {output_dir}/")
    
    # Estimate time
    total_files = len(files)
    # Rough estimate: assume ~100 comments per file, ~1.5 sec per comment per model
    estimated_minutes = (total_files * 100 * len(MODELS) * 1.5) / 60
    print(f"\n⏱️  Estimated time: ~{estimated_minutes:.0f} minutes")
    print(f"   (depends on number of comments per file)")
    
    # Confirm before starting
    response = input(f"\n▶️  Start processing? (y/n): ").lower().strip()
    if response != 'y':
        print("Cancelled.")
        return
    
    # Process each file
    start_time = time.time()
    
    for i, file in enumerate(files, 1):
        print(f"\n{'='*60}")
        print(f"FILE {i}/{total_files}: {file.name}")
        print(f"{'='*60}")
        
        file_start = time.time()
        
        try:
            process_csv_with_all_models(str(file), MODELS, output_dir)
            
            file_elapsed = time.time() - file_start
            print(f"\n✓ File completed in {file_elapsed/60:.1f} minutes")
            
        except KeyboardInterrupt:
            print("\n\n⚠️  Processing interrupted by user")
            print(f"Completed {i-1}/{total_files} files")
            sys.exit(1)
        except Exception as e:
            print(f"\n✗ Error processing {file.name}: {e}")
            print("Continuing to next file...")
    
    # Final summary
    total_elapsed = time.time() - start_time
    
    print(f"\n{'#'*60}")
    print(f"🎉 ALL FILES PROCESSED!")
    print(f"{'#'*60}")
    print(f"Files processed: {total_files}")
    print(f"Models per file: {len(MODELS)}")
    print(f"Total time: {total_elapsed/60:.1f} minutes ({total_elapsed/3600:.1f} hours)")
    print(f"\nResults saved to: {output_dir}/")
    print(f"\nFile structure:")
    print(f"  - Individual model files: [filename]_[model].csv")
    print(f"  - Comparison files: [filename]_ALL_MODELS_COMPARISON.csv")

def main():
    if len(sys.argv) > 1:
        if sys.argv[1] in ['-h', '--help']:
            print("Usage: python process_all_batches.py [pattern] [output_dir]")
            print("\nExamples:")
            print("  python process_all_batches.py")
            print("  python process_all_batches.py 'comments_batch_*.csv'")
            print("  python process_all_batches.py 'comments_batch_*.csv' my_results")
            print("\nDefault pattern: comments_batch_*.csv")
            print("Default output: multi_model_results/")
            sys.exit(0)
        
        pattern = sys.argv[1]
        output_dir = sys.argv[2] if len(sys.argv) > 2 else "multi_model_results"
    else:
        pattern = "comments_batch_*.csv"
        output_dir = "multi_model_results"
    
    process_all_batches(pattern, output_dir)

if __name__ == "__main__":
    main()
